{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.2解析真实地址抓取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-25T03:15:36.093580Z",
     "start_time": "2017-09-25T03:15:34.235188Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/**/ typeof jQuery1124049866736766120545_1506309304525 === 'function' && jQuery1124049866736766120545_1506309304525({\"results\":{\"parents\":[{\"replySeq\":32264140,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264140,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:54:25.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第21条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264137,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264137,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:54:18.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第20条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264133,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264133,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:54:11.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第19条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264132,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264132,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:54:05.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第18条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264129,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264129,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:53:59.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第17条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264124,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264124,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:53:53.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第16条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264121,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264121,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:53:46.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第15条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264120,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264120,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:53:41.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第14条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264117,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264117,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:53:36.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第13条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":32264113,\"name\":\"Santos\",\"memberId\":\"tangsongsky@gmail.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_4\",\"memberUrl\":\"https://livere.com\",\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":32264113,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world! - 大数据@唐松Santos\",\"site\":\"http://www.santostang.com/2017/03/02/hello-world/\",\"email\":null,\"ipAddress\":\"67.249.137.235\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2017-09-20T01:53:30.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"第12条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":3871836,\"memberGroupSeq\":25413747,\"memberSeq\":25870984,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null}],\"children\":[],\"quotations\":[]},\"resultCode\":200,\"resultMessage\":\"Okay, livere\"});\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "\n",
    "link = \"https://api-zero.livere.com/v1/comments/list?callback=jQuery1124049866736766120545_1506309304525&limit=10&offset=1&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506309304527\"\n",
    "headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} \n",
    "\n",
    "r = requests.get(link, headers= headers)\n",
    "print (r.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:07:15.081352Z",
     "start_time": "2017-09-24T16:07:15.075611Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 获取 json 的 string\n",
    "json_string = r.text\n",
    "json_string = json_string[json_string.find('{'):-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:09:00.500018Z",
     "start_time": "2017-09-24T16:09:00.492953Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "第21条测试评论\n",
      "第20条测试评论\n",
      "第19条测试评论\n",
      "第18条测试评论\n",
      "第17条测试评论\n",
      "第16条测试评论\n",
      "第15条测试评论\n",
      "第14条测试评论\n",
      "第13条测试评论\n",
      "第12条测试评论\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "json_data = json.loads(json_string)\n",
    "comment_list = json_data['results']['parents']\n",
    "\n",
    "for eachone in comment_list:\n",
    "    message = eachone['content']\n",
    "    print (message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:22:43.422456Z",
     "start_time": "2017-09-24T16:22:39.406885Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=1&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\n",
      "第21条测试评论\n",
      "第20条测试评论\n",
      "第19条测试评论\n",
      "第18条测试评论\n",
      "第17条测试评论\n",
      "第16条测试评论\n",
      "第15条测试评论\n",
      "第14条测试评论\n",
      "第13条测试评论\n",
      "第12条测试评论\n",
      "https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=2&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\n",
      "第11条测试评论\n",
      "第10条测试评论\n",
      "第9条测试评论\n",
      "第8条测试评论\n",
      "第7条测试评论\n",
      "第6条测试评论\n",
      "第5条测试评论\n",
      "第4条测试评论\n",
      "第3条测试评论\n",
      "第二条测试评论\n",
      "https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=3&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\n",
      "第一条测试评论\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "def single_page_comment(link):\n",
    "    headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} \n",
    "    r = requests.get(link, headers= headers)\n",
    "    # 获取 json 的 string\n",
    "    json_string = r.text\n",
    "    json_string = json_string[json_string.find('{'):-2]\n",
    "    json_data = json.loads(json_string)\n",
    "    comment_list = json_data['results']['parents']\n",
    "    \n",
    "    for eachone in comment_list:\n",
    "        message = eachone['content']\n",
    "        print (message)\n",
    "\n",
    "for page in range(1,4):\n",
    "    link1 = \"https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=\"\n",
    "    link2 = \"&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\"\n",
    "    page_str = str(page)\n",
    "    link = link1 + page_str + link2\n",
    "    print (link)\n",
    "    single_page_comment(link)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.3通过 selenium 模拟浏览器抓取"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.3.1 selenium 的安装与基本介绍"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:24:30.561998Z",
     "start_time": "2017-09-24T16:24:27.616197Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting selenium\n",
      "  Downloading selenium-3.5.0-py2.py3-none-any.whl (921kB)\n",
      "\u001b[K    100% |████████████████████████████████| 921kB 579kB/s ta 0:00:01\n",
      "\u001b[?25hInstalling collected packages: selenium\n",
      "Successfully installed selenium-3.5.0\n"
     ]
    }
   ],
   "source": [
    "! pip install selenium"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-25T03:27:13.195188Z",
     "start_time": "2017-09-25T03:26:55.301264Z"
    },
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "driver = webdriver.Firefox()\n",
    "driver.get(\"http://www.santostang.com/2017/03/02/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.firefox.firefox_binary import FirefoxBinary\n",
    "\n",
    "caps = webdriver.DesiredCapabilities().FIREFOX\n",
    "caps[\"marionette\"] = False\n",
    "binary = FirefoxBinary(r'D:\\Program Files\\Mozilla Firefox\\firefox.exe')\n",
    "#把上述地址改成你电脑中Firefox程序的地址\n",
    "driver = webdriver.Firefox(firefox_binary=binary, capabilities=caps)\n",
    "driver.get(\"http://www.santostang.com/2017/03/02/hello-world/\")\n",
    "driver.switch_to.frame(driver.find_element_by_css_selector(\"iframe[title='livere']\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-25T03:20:30.617650Z",
     "start_time": "2017-09-25T03:20:30.602933Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'<html lang=\"zh-CN\"><head>\\n<meta charset=\"UTF-8\">\\n<meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1, maximum-scale=1\">\\n<title>Hello world! - 数据科学@唐松Santos</title>\\n<meta name=\"description\" content=\"Welcome to WordPress. This is your first post. Edit or delete it, then start writing!\">\\n<meta name=\"keywords\" content=\"\">\\n<link rel=\"shortcut icon\" href=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/images/favicon.ico\" type=\"image/x-icon\">\\n<link rel=\"stylesheet\" href=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/css/bootstrap.min.css\">\\n<link rel=\"stylesheet\" href=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/css/font-awesome.min.css\">\\n<script src=\"http://push.zhanzhang.baidu.com/push.js\"></script><script src=\"https://cdn-city.livere.com/js/embed.dist.js\" async=\"\"></script><script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/js/jquery.min.js\"></script>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/js/bootstrap.min.js\"></script>\\n<link rel=\"stylesheet\" href=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/style.css\">\\n<link rel=\"pingback\" href=\"http://www.santostang.com/xmlrpc.php\">\\n<link rel=\"dns-prefetch\" href=\"//s.w.org\">\\n<link rel=\"stylesheet\" id=\"crayon-css\" href=\"http://www.santostang.com/wp-content/plugins/crayon-syntax-highlighter/css/min/crayon.min.css?ver=_2.7.2_beta\" type=\"text/css\" media=\"all\">\\n<link rel=\"https://api.w.org/\" href=\"http://www.santostang.com/wp-json/\">\\n<link rel=\"alternate\" type=\"application/json+oembed\" href=\"http://www.santostang.com/wp-json/oembed/1.0/embed?url=http%3A%2F%2Fwww.santostang.com%2F2017%2F03%2F02%2Fhello-world%2F\">\\n<link rel=\"alternate\" type=\"text/xml+oembed\" href=\"http://www.santostang.com/wp-json/oembed/1.0/embed?url=http%3A%2F%2Fwww.santostang.com%2F2017%2F03%2F02%2Fhello-world%2F&amp;format=xml\">\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/plugins/si-captcha-for-wordpress/captcha/si_captcha.js?ver=1506309578\"></script>\\n<!-- begin SI CAPTCHA Anti-Spam - login/register form style -->\\n<style type=\"text/css\">\\n.si_captcha_small { width:175px; height:45px; padding-top:10px; padding-bottom:10px; }\\n.si_captcha_large { width:250px; height:60px; padding-top:10px; padding-bottom:10px; }\\nimg#si_image_com { border-style:none; margin:0; padding-right:5px; float:left; }\\nimg#si_image_reg { border-style:none; margin:0; padding-right:5px; float:left; }\\nimg#si_image_log { border-style:none; margin:0; padding-right:5px; float:left; }\\nimg#si_image_side_login { border-style:none; margin:0; padding-right:5px; float:left; }\\nimg#si_image_checkout { border-style:none; margin:0; padding-right:5px; float:left; }\\nimg#si_image_jetpack { border-style:none; margin:0; padding-right:5px; float:left; }\\nimg#si_image_bbpress_topic { border-style:none; margin:0; padding-right:5px; float:left; }\\n.si_captcha_refresh { border-style:none; margin:0; vertical-align:bottom; }\\ndiv#si_captcha_input { display:block; padding-top:15px; padding-bottom:5px; }\\nlabel#si_captcha_code_label { margin:0; }\\ninput#si_captcha_code_input { width:65px; }\\np#si_captcha_code { clear: left; padding-top:10px; }\\n.si-captcha-jetpack-error { color:#DC3232; }\\n</style>\\n<!-- end SI CAPTCHA Anti-Spam - login/register form style -->\\n</head>\\n\\n<body>\\n<header id=\"header\">\\n  <div class=\"avatar\"><img src=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/images/avatar.jpg\" alt=\"数据科学@唐松Santos\" class=\"img-circle\" width=\"50%\"></div>\\n  <h3 id=\"name\">数据科学@唐松Santos</h3>\\n  <div class=\"sns\">\\n    <a href=\"http://www.santostang.com/feed/\" target=\"_blank\" rel=\"nofollow\" title=\"RSS\"><i class=\"fa fa-rss\" aria-hidden=\"true\"></i></a>\\n        <a href=\"http://weibo.com/santostang\" target=\"_blank\" rel=\"nofollow\" title=\"Weibo\"><i class=\"fa fa-weibo\" aria-hidden=\"true\"></i></a>\\n                <a href=\"https://www.linkedin.com/in/santostang\" target=\"_blank\" rel=\"nofollow\" title=\"Linkedin\"><i class=\"fa fa-linkedin\" aria-hidden=\"true\"></i></a>\\n                <a href=\"mailto:tangsongsky@gmail.com\" target=\"_blank\" rel=\"nofollow\" title=\"envelope\"><i class=\"fa fa-envelope\" aria-hidden=\"true\"></i></a>\\n          </div>\\n  <div class=\"nav\">\\n   <ul><li><a href=\"http://www.santostang.com/\">首页</a></li>\\n<li><a href=\"http://www.santostang.com/about-me/\">关于我</a></li>\\n<li><a href=\"http://www.santostang.com/scrapy_code/\">爬虫代码</a></li>\\n<li><a href=\"http://www.santostang.com/wp-login.php\">登录</a></li>\\n</ul>  </div>\\n</header>\\n<div id=\"main\">\\n    <article class=\"col-md-8 col-md-offset-2 view clearfix\">\\n    <h1 class=\"view-title\">Hello world!</h1>\\n    <div class=\"view-meta\">\\n      <span>作者: santos1993</span>\\n      <span>分类: <a href=\"http://www.santostang.com/category/bigdata/\" rel=\"category tag\">大数据</a>,<a href=\"http://www.santostang.com/category/big-data-marketing/\" rel=\"category tag\">大数据营销</a></span>\\n      <span>发布时间: 2017-03-02 05:03</span>\\n      <span></span>\\n    </div>\\n    <div class=\"view-content\">\\n      <p>Welcome to WordPress. This is your first post. Edit or delete it, then start writing!</p>\\n    </div>\\n    <section class=\"view-tag\">\\n      <div class=\"pull-left\"><i class=\"fa fa-tags\"></i> </div>\\n    </section>\\n    <section class=\"support-author\">\\n      <p>如果觉得我的文章对您有用，请随意打赏。您的支持将鼓励我继续创作！</p>\\n      <button type=\"button\" class=\"btn btn-primary\" data-toggle=\"modal\" data-target=\"#myModal\"><i class=\"fa fa-cny\" aria-hidden=\"true\"></i> 打赏支持</button>\\n    </section>\\n    <section id=\"comments\">\\n      <!-- LiveRe City install code -->\\n<div id=\"lv-container\" data-id=\"city\" data-uid=\"MTAyMC8yODU4My81MTU0\">\\n<script type=\"text/javascript\">\\nvar refer = \"http://www.santostang.com/2017/03/02/hello-world/\".replace(\"http://\",\"\");\\n   (function(d, s) {\\n       var j, e = d.getElementsByTagName(s)[0];\\n\\n       if (typeof LivereTower === \\'function\\') { return; }\\n\\n       j = d.createElement(s);\\n       j.src = \\'https://cdn-city.livere.com/js/embed.dist.js\\';\\n       j.async = true;\\n\\n       e.parentNode.insertBefore(j, e);\\n   })(document, \\'script\\');\\n</script>\\n<noscript>Please activate JavaScript for write a comment in LiveRe</noscript>\\n<iframe title=\"livere\" scrolling=\"no\" src=\"https://livere.me/comment/city?id=city&amp;refer=www.santostang.com%2F2017%2F03%2F02%2Fhello-world%2F&amp;uid=MTAyMC8yODU4My81MTU0&amp;site=http%3A%2F%2Fwww.santostang.com%2F2017%2F03%2F02%2Fhello-world%2F%3Fd%3D1&amp;title=Hello%20world!%20-%20%E6%95%B0%E6%8D%AE%E7%A7%91%E5%AD%A6%40%E5%94%90%E6%9D%BESantos\" style=\"min-width: 100%; width: 100px; height: 1556px; overflow: hidden; border: 0px none; z-index: 124212;\" id=\"lv-comment-739\" frameborder=\"0\"></iframe></div>\\n<!-- completed City install code -->    </section>\\n  </article>\\n    <section class=\"col-md-8 col-md-offset-2 clearfix\">\\n    <div class=\"read\">\\n      <div class=\"read-head\"> <i class=\"fa fa-book\"></i> 更多阅读 </div>\\n      <div class=\"read-list row\">\\n        <div class=\"col-md-6\">\\n          <ul>\\n                      </ul>\\n        </div>\\n        <div class=\"col-md-6\">\\n          <ul>\\n                        <li><a href=\"http://www.santostang.com/2017/03/06/%e3%80%90%e7%88%ac%e8%99%ab%e4%ba%8c%e3%80%91%e7%88%ac%e8%99%ab%e7%9a%84%e6%a1%86%e6%9e%b6%e5%92%8c%e5%9f%ba%e6%9c%ac%e8%ae%ae%e9%a2%98/\" title=\"【爬虫二】爬虫的框架和基本议题\">【爬虫二】爬虫的框架和基本议题</a></li>\\n                        <li><a href=\"http://www.santostang.com/2017/03/08/hello-python/\" title=\"Hello Python!\">Hello Python!</a></li>\\n                        <li><a href=\"http://www.santostang.com/2017/03/06/%e3%80%90%e7%88%ac%e8%99%ab%e4%b8%80%e3%80%91%e6%9c%80%e7%ae%80%e5%8d%95%e7%9a%84%e7%88%ac%e8%99%ab%ef%bc%8c%e9%9b%b6%e5%9f%ba%e7%a1%80%e6%95%99%e5%ad%a6/\" title=\"【爬虫一】最简单的爬虫，零基础教学\">【爬虫一】最简单的爬虫，零基础教学</a></li>\\n                        <li><a href=\"http://www.santostang.com/2017/03/07/echarts%e5%ad%a6%e4%b9%a0%e7%ac%94%e8%ae%b01-%e4%bd%bf%e7%94%a8%e6%a8%a1%e5%9d%97%e5%8c%96%e5%8d%95%e6%96%87%e4%bb%b6%e5%bc%95%e5%85%a5/\" title=\"echarts学习笔记(1) — 模块化单文件引入\">echarts学习笔记(1) — 模块化单文件引入</a></li>\\n                        <li><a href=\"http://www.santostang.com/2017/03/07/echarts%e5%ad%a6%e4%b9%a0%e7%ac%94%e8%ae%b02-%e5%8d%95%e9%a1%b5%e9%9d%a2%e5%a4%9a%e5%bc%a0%e5%9b%be%e8%a1%a8/\" title=\"echarts学习笔记(2) — 同一页面多图表\">echarts学习笔记(2) — 同一页面多图表</a></li>\\n                        <li><a href=\"http://www.santostang.com/2017/03/02/hello-world/\" title=\"Hello world!\">Hello world!</a></li>\\n                      </ul>\\n        </div>\\n      </div>\\n    </div>\\n    <div class=\"read\">\\n      <div class=\"read-head\"> <i class=\"fa fa-tags\"></i> 标签云 </div>\\n      <div class=\"read-list\">\\n        <a href=\"http://www.santostang.com/tag/echarts/\" class=\"tag-link-11 tag-link-position-1\" title=\"1个话题\" style=\"color:#cf1d76;font-size: 8pt;;\">Echarts</a>\\n<a href=\"http://www.santostang.com/tag/python/\" class=\"tag-link-6 tag-link-position-2\" title=\"2个话题\" style=\"color:#750414;font-size: 16.4pt;;\">python</a>\\n<a href=\"http://www.santostang.com/tag/%e5%a4%a7%e6%95%b0%e6%8d%ae/\" class=\"tag-link-8 tag-link-position-3\" title=\"3个话题\" style=\"color:#2bc12a;font-size: 22pt;;\">大数据</a>\\n<a href=\"http://www.santostang.com/tag/%e6%95%b0%e6%8d%ae%e5%8f%af%e8%a7%86%e5%8c%96/\" class=\"tag-link-10 tag-link-position-4\" title=\"1个话题\" style=\"color:#49df72;font-size: 8pt;;\">数据可视化</a>\\n<a href=\"http://www.santostang.com/tag/%e7%bd%91%e7%bb%9c%e7%88%ac%e8%99%ab/\" class=\"tag-link-7 tag-link-position-5\" title=\"2个话题\" style=\"color:#1c835f;font-size: 16.4pt;;\">网络爬虫</a>      </div>\\n    </div>\\n  </section>\\n</div>\\n<!--modal-->\\n<div class=\"modal fade\" id=\"myModal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"myModalLabel\">\\n  <div class=\"modal-dialog\" role=\"document\">\\n    <div class=\"modal-content\">\\n      <div class=\"modal-header\">\\n        <button type=\"button\" class=\"close\" data-dismiss=\"modal\" aria-label=\"Close\"><span aria-hidden=\"true\">×</span></button>\\n        <h4 class=\"modal-title\" id=\"myModalLabel\"><i class=\"fa fa-cny\" aria-hidden=\"true\"></i> 打赏支持</h4>\\n      </div>\\n      <div class=\"modal-body text-center\">\\n        <p><img src=\"http://www.santostang.com/media/Alipay.jpeg\" alt=\"Santos支付宝\" style=\"margin: 0 8%;\" height=\"180\" width=\"180\" border=\"0\"><img src=\"http://www.santostang.com/media/wechat.jpeg\" alt=\"Santos微信钱包\" style=\"margin: 0 8%;\" height=\"180\" width=\"180\" border=\"0\"></p>\\n        <p>扫描二维码，输入您要打赏的金额</p>\\n      </div>\\n    </div>\\n  </div>\\n</div>\\n<!--modal-->\\n\\n<footer id=\"footer\">\\n  <div class=\"copyright\">\\n    <p><i class=\"fa fa-copyright\" aria-hidden=\"true\"></i> 2017 <b>数据科学@唐松Santos</b></p>\\n    <p>Powered by <b>WordPress</b>. Theme by <a href=\"http://tangjie.me/jiestyle\" title=\"JieStyle\" target=\"_blank\"><b>JieStyle Two</b></a> | </p>\\n  </div>\\n  <div style=\"display:none;\">  </div>\\n</footer>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/js/skel.min.js\"></script> \\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/js/util.min.js\"></script> \\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/themes/SongStyle-Two/js/nav.js\"></script>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-includes/js/jquery/jquery.js?ver=1.12.4\"></script>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-includes/js/jquery/jquery-migrate.min.js?ver=1.4.1\"></script>\\n<script type=\"text/javascript\">\\n/* <![CDATA[ */\\nvar CrayonSyntaxSettings = {\"version\":\"_2.7.2_beta\",\"is_admin\":\"0\",\"ajaxurl\":\"http:\\\\/\\\\/www.santostang.com\\\\/wp-admin\\\\/admin-ajax.php\",\"prefix\":\"crayon-\",\"setting\":\"crayon-setting\",\"selected\":\"crayon-setting-selected\",\"changed\":\"crayon-setting-changed\",\"special\":\"crayon-setting-special\",\"orig_value\":\"data-orig-value\",\"debug\":\"\"};\\nvar CrayonSyntaxStrings = {\"copy\":\"\\\\u4f7f\\\\u7528 %s \\\\u590d\\\\u5236\\\\uff0c\\\\u4f7f\\\\u7528 %s \\\\u7c98\\\\u8d34\\\\u3002\",\"minimize\":\"\\\\u70b9\\\\u51fb\\\\u5c55\\\\u5f00\\\\u4ee3\\\\u7801\"};\\n/* ]]> */\\n</script>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-content/plugins/crayon-syntax-highlighter/js/min/crayon.min.js?ver=_2.7.2_beta\"></script>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-includes/js/comment-reply.min.js?ver=4.7.6\"></script>\\n<script type=\"text/javascript\" src=\"http://www.santostang.com/wp-includes/js/wp-embed.min.js?ver=4.7.6\"></script>\\n<script>\\n(function(){\\n    var bp = document.createElement(\\'script\\');\\n    var curProtocol = window.location.protocol.split(\\':\\')[0];\\n    if (curProtocol === \\'https\\') {\\n        bp.src = \\'https://zz.bdstatic.com/linksubmit/push.js\\';        \\n    }\\n    else {\\n        bp.src = \\'http://push.zhanzhang.baidu.com/push.js\\';\\n    }\\n    var s = document.getElementsByTagName(\"script\")[0];\\n    s.parentNode.insertBefore(bp, s);\\n})();\\n</script>\\n\\n\\n<div id=\"titleBar\"><a href=\"#header\" class=\"toggle\"></a><span class=\"title\">数据科学@唐松Santos</span></div><iframe title=\"livere\" scrolling=\"no\" src=\"https://livere.me/sidebar/city?id=city&amp;refer=www.santostang.com%2F2017%2F03%2F02%2Fhello-world%2F&amp;uid=MTAyMC8yODU4My81MTU0&amp;site=http%3A%2F%2Fwww.santostang.com%2F2017%2F03%2F02%2Fhello-world%2F%3Fd%3D1&amp;title=Hello%20world!%20-%20%E6%95%B0%E6%8D%AE%E7%A7%91%E5%AD%A6%40%E5%94%90%E6%9D%BESantos\" style=\"width: 100%; overflow: hidden; border: 0px none; position: fixed; left: 0px; top: 0px; z-index: 2147483647; display: none; height: 871px;\" id=\"lv-utils-370\" frameborder=\"0\"></iframe></body></html>'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.page_source"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-25T03:27:13.248457Z",
     "start_time": "2017-09-25T03:27:13.198704Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "NoSuchElementException",
     "evalue": "Message: Unable to locate element: div.reply-content\n",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNoSuchElementException\u001b[0m                    Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-9-3aa7391eab6c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcomment\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_element_by_css_selector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'div.reply-content'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mcontent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomment\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_element_by_tag_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'p'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/santostang/anaconda/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mfind_element_by_css_selector\u001b[0;34m(self, css_selector)\u001b[0m\n\u001b[1;32m    496\u001b[0m             \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_element_by_css_selector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'#foo'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    497\u001b[0m         \"\"\"\n\u001b[0;32m--> 498\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_element\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mCSS_SELECTOR\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcss_selector\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    499\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    500\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mfind_elements_by_css_selector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcss_selector\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/santostang/anaconda/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mfind_element\u001b[0;34m(self, by, value)\u001b[0m\n\u001b[1;32m    830\u001b[0m         return self.execute(Command.FIND_ELEMENT, {\n\u001b[1;32m    831\u001b[0m             \u001b[0;34m'using'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mby\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 832\u001b[0;31m             'value': value})['value']\n\u001b[0m\u001b[1;32m    833\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    834\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mfind_elements\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mby\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mID\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/santostang/anaconda/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m    295\u001b[0m         \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    296\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    298\u001b[0m             response['value'] = self._unwrap_value(\n\u001b[1;32m    299\u001b[0m                 response.get('value', None))\n",
      "\u001b[0;32m/Users/santostang/anaconda/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m    192\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mexception_class\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mUnexpectedAlertPresentException\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m'alert'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    193\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'alert'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 194\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    195\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    196\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNoSuchElementException\u001b[0m: Message: Unable to locate element: div.reply-content\n"
     ]
    }
   ],
   "source": [
    "comment = driver.find_element_by_css_selector('div.reply-content')\n",
    "content = comment.find_element_by_tag_name('p')\n",
    "print (content.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-25T03:27:19.609710Z",
     "start_time": "2017-09-25T03:27:19.515340Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "第21条测试评论\n"
     ]
    }
   ],
   "source": [
    "driver.switch_to.frame(driver.find_element_by_css_selector(\"iframe[title='livere']\"))\n",
    "comment = driver.find_element_by_css_selector('div.reply-content')\n",
    "content = comment.find_element_by_tag_name('p')\n",
    "print (content.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.3.3 selenium获取文章的所有评论"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-25T03:31:31.827281Z",
     "start_time": "2017-09-25T03:31:09.276497Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "第21条测试评论\n",
      "第20条测试评论\n",
      "第19条测试评论\n",
      "第18条测试评论\n",
      "第17条测试评论\n",
      "第16条测试评论\n",
      "第15条测试评论\n",
      "第14条测试评论\n",
      "第13条测试评论\n",
      "第12条测试评论\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.firefox.firefox_binary import FirefoxBinary\n",
    "import time\n",
    "\n",
    "caps = webdriver.DesiredCapabilities().FIREFOX\n",
    "caps[\"marionette\"] = False\n",
    "binary = FirefoxBinary(r'D:\\Program Files\\Mozilla Firefox\\firefox.exe')\n",
    "#把上述地址改成你电脑中Firefox程序的地址\n",
    "driver = webdriver.Firefox(firefox_binary=binary, capabilities=caps)\n",
    "driver.get(\"http://www.santostang.com/2017/03/02/hello-world/\")\n",
    "driver.switch_to.frame(driver.find_element_by_css_selector(\"iframe[title='livere']\"))\n",
    "\n",
    "comments = driver.find_elements_by_css_selector('div.reply-content')\n",
    "for eachcomment in comments:\n",
    "    content = eachcomment.find_element_by_tag_name('p')\n",
    "    print (content.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.3.4\tSelenium的高级操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 控制 css\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.firefox.firefox_binary import FirefoxBinary\n",
    "\n",
    "caps = webdriver.DesiredCapabilities().FIREFOX\n",
    "caps[\"marionette\"] = False\n",
    "\n",
    "binary = FirefoxBinary(r'D:\\Program Files\\Mozilla Firefox\\firefox.exe')\n",
    "#把上述地址改成你电脑中Firefox程序的地址\n",
    "fp = webdriver.FirefoxProfile()\n",
    "fp.set_preference(\"permissions.default.stylesheet\",2)\n",
    "\n",
    "driver = webdriver.Firefox(firefox_binary=binary, firefox_profile=fp, capabilities=caps)\n",
    "driver.get(\"http://www.santostang.com/2017/03/02/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 限制图片的加载\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.firefox.firefox_binary import FirefoxBinary\n",
    "\n",
    "caps = webdriver.DesiredCapabilities().FIREFOX\n",
    "caps[\"marionette\"] = False\n",
    "binary = FirefoxBinary(r'D:\\Program Files\\Mozilla Firefox\\firefox.exe')\n",
    "#把上述地址改成你电脑中Firefox程序的地址\n",
    "fp = webdriver.FirefoxProfile()\n",
    "fp.set_preference(\"permissions.default.image\",2)\n",
    "driver = webdriver.Firefox(firefox_binary=binary, firefox_profile = fp, capabilities=caps)\n",
    "driver.get(\"http://www.santostang.com/2017/03/02/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 限制 JavaScript 的执行\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.firefox.firefox_binary import FirefoxBinary\n",
    "\n",
    "caps = webdriver.DesiredCapabilities().FIREFOX\n",
    "caps[\"marionette\"] = False\n",
    "\n",
    "binary = FirefoxBinary(r'D:\\Program Files\\Mozilla Firefox\\firefox.exe')\n",
    "#把上述地址改成你电脑中Firefox程序的地址\n",
    "fp = webdriver.FirefoxProfile()\n",
    "fp.set_preference(\"javascript.enabled\", False)\n",
    "driver = webdriver.Firefox(firefox_binary=binary, firefox_profile = fp, capabilities=caps)\n",
    "driver.get(\"http://www.santostang.com/2017/03/02/hello-world/\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "navigate_num": "#000000",
    "navigate_text": "#333333",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700",
    "sidebar_border": "#EEEEEE",
    "wrapper_background": "#FFFFFF"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "117px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
